#### ---- Setup ---------------------------------------------------------- ####
rm(list = ls())

## Uncomment to install autumn package for comparing samples to targets: 
## devtools::install_github("aaronrudkin/autumn")

## Uncomment to install vtable package for fast descriptive tables: 
## install.packages('vtable')

## Uncomment to install crossEstimation package to implement agnostic approach
## to covariate adjustment described in SI
## devtools::install_github("swager/crossEstimation")

## Uncomment to install ggpubr package for combining plots
##install.packages('ggpubr')

suppressMessages({
  library(tidyverse) 
  library(estimatr)
  library(coefplot)
  library(ggthemes)
  library(ggforce)
  library(ggbeeswarm)
  library(xtable)
  library(autumn) 
  library(crossEstimation)
  library(grf)
})

## Helper functions needed to run the replication code below. 
source("helpers.R")

## Municipal survey of Yonkers residents 
cvs_dat <-
  read_rds("municipal_survey.rds")

## National survey of US adults 
lucid_dat <-
  read_rds("national_survey.rds")

## Combined dataset on misperceptions in long format
long_dat <- 
  read_rds("combined_long_survey.rds") %>% 
  mutate(
    sample = ifelse(str_detect(sample, "municipal"), "Municipal", "National")
  ) 

#### ---- Table S1-S3: demographic characteristics in sample v. pop ------ #####

## Table S1: 
target_pop <-
  read_rds("national_target_marginals.rds")

## Normalize to sum to 1
target_pop <- lapply(target_pop, function(x) x / sum(x))

## Check divergence between sample and target
get_current_miss(lucid_dat, target = target_pop)

## variance inflation caused by weights
design_effect(lucid_dat$weights)
effective_sample_size(lucid_dat$weights)

## Export table for SI. 
diagnose_lucid <-
  lucid_dat %>%
  diagnose_weights(weights = lucid_dat$weights,
                   target = target_pop) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = factor(
    level,
    levels = c(
      ## Sex: 
      "Female",
      "Male",
      ## Race/ethnicity:
      "White",
      "Hispanic",
      "Black",
      "AAPI",
      "Other",
      ## Age: 
      "18-24",
      "25-29",
      "30-34",
      "35-39",
      "40-44",
      "45-49",
      "50-54",
      "55-59",
      "60-64",
      "65-69",
      "70-74",
      "75+",
      ## Region:
      "South",
      "West",
      "Midwest",
      "Northeast",
      ## Educational attainment
      "No high school diploma",
      "High school diploma",
      "Some college",
      "Associate's degree",
      "Bachelor's degree",
      "Graduate degree",
      ## Income
      "$15,000 or less",
      "$15,000-$24,999",
      "$25,000-$34,999",
      "$35,000-$49,999",
      "$50,000-$74,999",
      "$75,000-$99,999",
      "$100,000-$149,999",
      "$150,000-$199,999",
      "$200,000 and above"
    )
  )) %>%
  arrange(level)

## Lucid error
mean(diagnose_lucid$error_original)
mean(diagnose_lucid$error_weighted)

## Export table
diagnose_lucid %>%
  rename(sample = prop_original,
         error = error_original) %>%
  select(level, sample, target, error) %>%
  xtable() %>%
  print(include.rownames = FALSE,
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "benchmark_national.tex")


## Table S2: 
target_pop <-
  read_rds("municipal_target_marginals.rds")
target_pop <- lapply(target_pop, function(x) x / sum(x))

## Check divergence between sample and target
get_current_miss(cvs_dat, target = target_pop)

## variance inflation caused by weights
design_effect(cvs_dat$weights)
effective_sample_size(cvs_dat$weights)

## Export table for SI.
diagnose_cvs <-
  cvs_dat %>%
  diagnose_weights(weights = cvs_dat$weights,
                   target = target_pop) %>%
  mutate_if(is.numeric, round, 3) %>%
  mutate(level = factor(
    level,
    levels = c(
      ## Sex:
      "Female",
      "Male",
      ## Race/ethnicity:
      "White",
      "Hispanic",
      "Black",
      "AAPI",
      "Other",
      ## Age
      "18-24",
      "25-29",
      "30-34",
      "35-44",
      "45-54",
      "55-64",
      "65-74",
      "75+",
      ## Birthplace:
      "Born in USA", 
      "Not born in USA",
      ## Educational attainment
      "No high school diploma",
      "High school diploma",
      "Some college",
      "Associate's degree",
      "Bachelor's degree",
      "Graduate degree",
      ## Income:
      "$15,000 or less",
      "$15,000-$24,999",
      "$25,000-$34,999",
      "$35,000-$49,999",
      "$50,000-$74,999",
      "$75,000-$99,999",
      "$100,000-$149,999",
      "$150,000-$199,999",
      "$200,000 and above"
    )
  )) %>%
  arrange(level)

## CVS error
mean(diagnose_cvs$error_original)
mean(diagnose_cvs$error_weighted)

## Export table
diagnose_cvs %>%
  rename(sample = prop_original,
         error = error_original) %>%
  select(level, sample, target, error) %>%
  xtable() %>%
  print(include.rownames = FALSE,
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "benchmark_municipal.tex")


## Table S3:

## Population proportions are based on demographics provided by YPD for 626 
## officers employed on full-time basis: 490 White, 95 Hispanic/Latino, 
## 38 Black, 3 Asian | 538 Male, 88 Female. 
## YPD sample proportions are calculated from survey responses of 250 sworn 
## officers. These individual-level data are not public, and cannot be linked 
## to conjoint experiment to protect research participant confidentiality.


#### ---- Table S4-S6: belief accuracy and proportions over/under -------- #####

## Estimate differences for each group w/ SEs clustered at respondent level
est_diffs <-
  long_dat %>% 
  group_by(sample, group) %>% 
  do(tidy(lm_robust(
    diff ~ 1, data = .,
    clusters = person_id
  ))) %>% 
  ungroup() %>% 
  mutate_if(is.numeric, round, 2) %>% 
  arrange(desc(sample))

## Table S4
est_diffs <- 
  est_diffs %>%
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value)) %>%
  select(sample, group, entry) %>%
  arrange(sample) %>% 
  pivot_wider(names_from = sample, values_from = entry) %>%
  mutate(group = factor(
    group,
    levels = c(
      "Male",
      "Female",
      "White",
      "Non-White",
      "Black",
      "Latino",
      "Asian"
    )
  )) %>%
  arrange(group) 

## Export
est_diffs %>%   
  xtable(., ) %>%
  print(
    include.rownames = FALSE,
    include.colnames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    type = "latex",
    file = "perception_gap_means.tex"
  )

## Table S5
over_tab <-
  long_dat %>%
  filter(!(group %in% c("Male", "White"))) %>% 
  group_by(sample, group) %>%
  summarise(
    prop_5 = mean(diff > 5),
    prop_10 = mean(diff > 10),
    prop_15 = mean(diff > 15),
    prop_20 = mean(diff > 20)
  ) %>%
  ungroup() %>% 
  mutate(
    sample = ifelse(
      str_detect(sample, "National"),
      "National sample",
      "Municipal sample"
    ),
    group = factor(
      group,
      levels = c(
        "Male",
        "Female",
        "White",
        "Non-White",
        "Black",
        "Latino",
        "Asian"
      )
    )
  ) %>%
  arrange(sample, group) %>% 
  mutate_if(is.numeric, round, 2)

## Export
over_tab %>% 
  xtable(., ) %>%
  print(
    include.rownames = FALSE,
    include.colnames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    type = "latex",
    file = "perception_gap_over.tex"
  )

## Table S6
under_tab <-
  long_dat %>%
  filter((group %in% c("Male", "White"))) %>% 
  group_by(sample, group) %>%
  summarise(
    prop_5 = mean(diff < -5),
    prop_10 = mean(diff < -10),
    prop_15 = mean(diff < -15),
    prop_20 = mean(diff < -20)
  ) %>%
  ungroup() %>% 
  mutate(
    sample = ifelse(
      str_detect(sample, "National"),
      "National sample",
      "Municipal sample"
    ),
    group = factor(
      group,
      levels = c(
        "Male",
        "Female",
        "White",
        "Non-White",
        "Black",
        "Latino",
        "Asian"
      )
    )
  ) %>%
  arrange(sample, group) %>% 
  mutate_if(is.numeric, round, 2)

## Export 
under_tab %>% 
  xtable(., ) %>%
  print(
    include.rownames = FALSE,
    include.colnames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    type = "latex",
    file = "perception_gap_under.tex"
  )

#### ---- Table S7: estimated ATEs on survey index components ------------ #####

## names of index outcomes in municipal sample 
outcomes <-
  c(
    "y_aa_black_s",
    "y_aa_latino_s",
    "y_aa_asian_s",
    "y_aa_female_s",
    "y_trolley_black_s",
    "y_trolley_latino_s",
    "y_trolley_asian_s",
    "y_trolley_female_s"
  )

## Subset national sample to relevant arms, standardize with Glass Delta, 
## stack on top of municipal sample, and loop through all the DVs
est_dat <-
  lucid_dat %>%
  filter(Z_diversity %in% c("Control", "Info")) %>%
  mutate(
    y_aa_black_s = glass_delta(y_aa_black_n, Z = Z_diversity,
                               reference = "Control"),
    y_aa_latino_s = glass_delta(y_aa_latino_n, Z = Z_diversity,
                                reference = "Control"),
    y_aa_asian_s = glass_delta(y_aa_asian_n, Z = Z_diversity,
                               reference = "Control"),
    y_aa_female_s = glass_delta(y_aa_female_n, Z = Z_diversity,
                                reference = "Control"),
    y_trolley_black_s = glass_delta(y_trolley_black_n, Z = Z_diversity,
                                    reference = "Control"),
    y_trolley_latino_s = glass_delta(y_trolley_latino_n, Z = Z_diversity,
                                     reference = "Control"),
    y_trolley_asian_s = glass_delta(y_trolley_asian_n, Z = Z_diversity,
                                    reference = "Control"),
    y_trolley_female_s = glass_delta(y_trolley_female_n, Z = Z_diversity,
                                     reference = "Control"),
    sample = "National sample"
  ) %>%
  select(outcomes, Z_diversity, sample) %>%
  bind_rows(
    cvs_dat %>%
      filter(R_t1 == 1) %>% 
      select(Z_diversity_t1, paste0(outcomes, "_t1")) %>%
      rename_with( ~ str_remove(., "_t1")) %>%
      mutate(
        Z_diversity = ifelse(Z_diversity == "control", "Control", "Info"),
        sample = "Municipal sample"
      )
  )

## Estimate ATEs on index components for primary survey indexes 
item_est <- list()
for (i in 1:length(outcomes)) {
  item_est[[i]] <-
    est_dat %>%
    group_by(sample) %>%
    do(tidy(lm_robust(as.formula(
      paste0(outcomes[i], "~ Z_diversity")
    ),
    data = .))) %>%
    filter(term == "Z_diversityInfo") %>%
    ungroup() %>% 
    mutate_if(is.numeric, round, 3)
}

## Combine and prep for export
est_out <-
  bind_rows(item_est) %>%
  mutate(
    outcome = case_when(
      outcome == "y_aa_black_s" ~       "Affirmative action: Black officers",
      outcome == "y_aa_latino_s" ~      "Affirmative action: Hispanic/Latino officers",
      outcome == "y_aa_asian_s" ~       "Affirmative action: Asian officers",
      outcome == "y_aa_female_s" ~      "Affirmative action: Female officers",
      outcome == "y_trolley_black_s" ~  "Tie-breaking: Black applicants",
      outcome == "y_trolley_latino_s" ~ "Tie-breaking: Hispanic/Latino applicants",
      outcome == "y_trolley_asian_s" ~  "Tie-breaking: Asian applicants",
      outcome == "y_trolley_female_s" ~ "Tie-breaking: Female applicants"
    ),
    outcome = factor(
      outcome,
      levels = c(
        "Affirmative action: Black officers",
        "Affirmative action: Hispanic/Latino officers",
        "Affirmative action: Asian officers",
        "Affirmative action: Female officers",
        "Tie-breaking: Black applicants",
        "Tie-breaking: Hispanic/Latino applicants",
        "Tie-breaking: Asian applicants",
        "Tie-breaking: Female applicants"
      )
    ),
    outcome_lab = str_remove(outcome, ".*: "),
    outcome_lab = factor(
      outcome_lab,
      levels = rev(c(
        "Black officers",
        "Black applicants",
        "Hispanic/Latino officers",
        "Hispanic/Latino applicants",
        "Asian officers",
        "Asian applicants",
        "Female officers",
        "Female applicants"
      ))
    ),
    outcome_group = ifelse(
      str_detect(outcome, "Affirmative"),
      "Support for affirmative action in recruitment and hiring",
      "Support for tie-breaking in favor of minority applicants"
    )
  )

## Export to latex
est_out %>% 
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value)) %>%
  dplyr::select(outcome, sample, entry) %>% 
  spread(sample, entry) %>% 
  xtable(., ) %>%
  print(include.rownames = FALSE, 
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "ates_index_items.tex")

#### ---- Fig S11: ATEs on survey index components ----------------------- ####
g <- 
  ggplot(est_out,
         aes(
           x = estimate,
           y = outcome_lab,
           color = sample,
           group = sample,
           fill = sample,
           shape = sample
         )) +
  geom_vline(
    xintercept = 0,
    color = "black",
    linetype = "dashed",
    size = 0.4
  ) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0,
    position = position_dodgev(height = 0.7),
    size = 1.25
  ) +
  geom_point(position = position_dodgev(height = 0.7),
             size = 3,
             color = "white") +
  scale_color_manual("", values = rev(c("#888888", "#000000"))) +
  scale_fill_manual("", values = rev(c("#888888", "#000000"))) +
  scale_shape_manual("", values = rev(c(21, 22))) +
  facet_col(~ outcome_group, space = "free", scales = "free_y") +
  theme_tufte(base_size = 14) +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.length.x = unit(0.2, "cm"),
    axis.ticks.x = element_line(size = 0.4),
    strip.text = element_text(size = 14, hjust = 0),
    axis.line.x = element_line(size = 0.4),
    axis.title.x = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    plot.margin = unit(c(
      t = -1,
      r = 0.5,
      b = 0.1,
      l = -0.5
    ), "lines"),
    legend.position = "none"
  ) +
  labs(subtitle = "",
       x = "Average treatment effect estimate (in standard units)",
       y = "")

p <- 
  g +
  geom_label(
    data = g$data %>%
      filter(outcome_lab == "Black officers"),
    aes(label = sample), 
    #nudge_x = 0.3,
    position = position_dodgev(height = 0.7),
    hjust = -1.2,
    color = "white",
    size = 3.5
  )

p

ggsave(p, filename = "ates_index_items.pdf", width = 6.5, height = 7)


#### ---- Table S8: estimated ATEs, with and without cov adjustment ------ #####

## NB: we did a poor job articulating which covariates would be used for
## adjustment in the PAP, and different subsets are prognostic of the
## different outcomes. An agnostic solution is to specify the list of 
## pre-treatment covariates that could be used for adjustment and 
## use machine learning estimators to automate this process. We use the 
## approach described in the SI via the crossEstimation package


#### -------- Estimation: random forest, municipal sample ---------------- ####

## Specify list of pre-treatment covariates for regression adjustment
covariates <-
  c(
    ## Baseline demos/background characteristics
    "x_female_t0",
    "x_race_cat_t0",
    "x_age_cat_t0",
    "x_immi_short_t0",
    "x_edu_short_t0",
    "x_income_cat_t0",
    "x_emp_short_t0",
    "x_pid_7n_t0",
    "x_tenure_t0",
    "x_homeowner_t0",
    ## Attention to politics at local / national level:
    "x_atnpol_local_n_t0",
    "x_atnpol_national_n_t0",
    ## Prior contact with police:
    "x_contact_ncops_n_t0",
    "x_contact_freq_n_t0",
    "x_contact_arrest_t0",
    "x_contact_recent_t0",
    "x_contact_unfair_t0",
    ## Feelings of safety and crime victimization:
    "x_safety_walk_n_t0",
    "x_safety_victim_t0",
    ## Trust and confidence in police
    "y_trust_idx_t0",
    ## Stated support for diversification
    "y_policy_supp_dvsty_n_t0",
    ## Rank ordering of diversification policy
    "y_policy_rank_dvsty_n_t0", 
    ## Trust/legitimacy index
    "y_ltc_idx_t0",
    ## Cooperation index
    "y_coop_idx_t0",
    ## Prior contact with police:
    "x_contact_recent_t1",
    "x_contact_unfair_t1",
    ## Feelings of safety and crime victimization:
    "x_safety_walk_n_t1",
    "x_safety_victim_t1",
    ## Differences between guess and truth
    "x_diverse_diff_white_t1",
    "x_diverse_diff_black_t1",
    "x_diverse_diff_latino_t1",
    "x_diverse_diff_asian_t1",
    "x_diverse_diff_female_t1",
    "x_diverse_diff_male_t1"
  )


## Specify outcomes:
outcomes <-
  c(
    "y_vote_dvsty_s_t1",
    "y_donate_s_t1",
    "y_trolley_idx_s_t1",
    "y_aa_idx_s_t1",
    "y_coop_idx_s_t1",
    "y_trust_idx_s_t1"
  )


## Subset to estimation sample and specify covariate matrix (crossEstiation
## package is not yet tidyverse friendly)
t1_dat <-
  cvs_dat %>%
  filter(R_t1 == 1) %>%
  filter_at(vars(covariates), all_vars(complete.cases(.)))

X <-
  model.matrix(as.formula(paste("~", paste(
    covariates, collapse = "+"
  ))),
  data = t1_dat)

## Treatment indicator
w <- as.numeric(t1_dat[, "Z_diversity_t1"] == "info")

## Loop over all the outcome measures, apply the algorithm and store the 
## results. 
set.seed(123)
forest_est <- list()
for (i in 1:length(outcomes)) {
  
  ## Specify outcome for this loop
  y <- t1_dat %>% pull(outcomes[i]) %>% as.numeric()
  
  tmp <-
    ate.randomForest(
      X = X,
      Y = y,
      W = w,
      conf.level = 0.95
    )
  
  forest_est[[i]] <-
    data.frame(
      estimate = tmp$tau,
      std.error = sqrt(tmp$var),
      conf.low = tmp$conf.int[1],
      conf.high = tmp$conf.int[2],
      outcome = outcomes[i]
    )
  
}

cvs_est_df <- 
  bind_rows(forest_est) %>% 
  mutate(statistic = estimate/std.error,
         p.value = 2*(1-pnorm(abs(statistic))),
         sample = "Municipal sample")

#### -------- Estimation: random forest, national sample ----------------- ####

## Specify list of pre-treatment covariates for regression adjustment
covariates <-
  c(
    "x_female",
    "x_age_cat",
    "x_edu",
    "x_safety_walk_n",
    "x_contact_freq_n",
    "x_contact_recent",
    "x_hhi_short",
    "x_pid_7n",
    "x_race",
    "x_trust_idx",
    "x_region",
    "x_usa_diff_white",
    "x_usa_diff_black",
    "x_usa_diff_latino",
    "x_usa_diff_asian",
    "x_usa_diff_male",
    "x_usa_diff_female"
  )

## Outcomes:
outcomes <-
  c("y_trolley_idx_s",
    "y_aa_idx_s")

## Subset to estimation sample and specify covariate matrix (crossEstiation
## package is not tidyverse friendly)
est_dat <-
  lucid_dat %>%
  filter(Z_diversity %in% c("Info", "Control")) %>% 
  filter_at(vars(covariates), all_vars(complete.cases(.)))

X <-
  model.matrix(as.formula(paste("~", paste(
    covariates, collapse = "+"
  ))),
  data = est_dat)

## Binary treatment indicator
w <- as.numeric(est_dat[, "Z_diversity"] == "Info")

## Loop over all the outcome measures, apply the algorithm and store the 
## results. 
set.seed(123)
forest_est <- list()
for (i in 1:length(outcomes)) {
  
  ## Specify outcome for this loop
  y <- est_dat %>% pull(outcomes[i]) %>% as.numeric()
  
  tmp <-
    ate.randomForest(
      X = X,
      Y = y,
      W = w,
      conf.level = 0.95
    )
  
  forest_est[[i]] <-
    data.frame(
      estimate = tmp$tau,
      std.error = sqrt(tmp$var),
      conf.low = tmp$conf.int[1],
      conf.high = tmp$conf.int[2],
      outcome = outcomes[i]
    )
  
}

lucid_est_df <- 
  bind_rows(forest_est) %>% 
  mutate(statistic = estimate/std.error,
         p.value = 2*(1-pnorm(abs(statistic))),
         sample = "National sample")


#### -------- Combine estimates ---------------- ####

est_adj <-
  bind_rows(cvs_est_df, lucid_est_df) %>%
  mutate(
    estimator = "Random forest",
    estimand = "ATE",
    sample = factor(sample, levels = rev(c(
      "National sample",
      "Municipal sample"
    ))),
    outcome = case_when(
      str_detect(outcome, "y_trolley_idx") ~ "Diversity hiring",
      str_detect(outcome, "y_aa_idx") ~ "Affirmative action",
      outcome == "y_vote_dvsty_s_t1" ~ "Political action",
      outcome == "y_donate_s_t1" ~ "Donations to pro-Black group",
      outcome == "y_trust_idx_s_t1" ~ "Trust in the police",
      outcome == "y_coop_idx_s_t1" ~ "Willingness to cooperate"
    ),
    outcome_lab = factor(
      outcome,
      levels = c(
        "Affirmative action",
        "Diversity hiring",
        "Political action",
        "Donations to pro-Black group",
        "Trust in the police",
        "Willingness to cooperate"
      ),
      labels = c(
        "Support for affirmative action in recruitment and hiring",
        "Support for tie-breaking in favor of minority applicants",
        "Voted for police department diversification",
        "Donation to Black police officer association",
        "Trust and confidence in the police department",
        "Willingness to cooperate with police officers"
      )
    )
  ) %>% 
  select(estimate, std.error, conf.low, conf.high, statistic, p.value, sample, 
         outcome, estimator, estimand, outcome_lab)

## Combine with unadjusted estimates (created by manuscript.R) 
est_unadj <- read_rds("estimated_ates_manuscript.rds")

est_ates <- 
  bind_rows(est_adj, est_unadj) %>% 
  mutate_if(is.numeric, round, 3)

## Export table of ATEs (with and without adjustment) for SI
est_ates %>% 
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value)) %>%
  dplyr::select(outcome_lab, entry, estimator, sample) %>% 
  spread(estimator, entry) %>% 
  xtable(., ) %>%
  print(include.rownames = FALSE, 
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "adj_unadj_ates.tex")


#### ---- Table S9-S10: additional pre-treatment covs by sample ---------- #####

## National sample (Table S9)
covariates <-
  c(
    "x_safety_walk_n",
    "x_contact_freq_n",
    "x_contact_recent",
    "x_pid_7n",
    "x_trust_idx",
    "x_usa_diff_white",
    "x_usa_diff_black",
    "x_usa_diff_latino",
    "x_usa_diff_asian",
    "x_usa_diff_male",
    "x_usa_diff_female"
  )


lucid_dat %>%
  filter(Z_diversity %in% c("Control", "Info")) %>% 
  select(covariates) %>%
  vtable::st(.,
             summ = c('mean(x)',
                      'sd(x)',
                      'min(x)',
                      'max(x)',
                      'notNA(x)'),
             digits = 2, 
             fixed.digits = TRUE,
             out = "latex")

## Municipal sample (Table S10)
these_covs_m <-
  c(## Prior experience w/ police & crime:
    "x_contact_ncops_n_t0",
    "x_contact_freq_n_t0",
    "x_contact_recent_t0",
    "x_contact_arrest_t0",
    "x_contact_unfair_t0",
    ## Feelings of safety and crime victimization:
    "x_safety_walk_n_t0",
    "x_safety_victim_t0",
    ## Other pre-treatment measures from baseline survey:
    ## Years living in Yonkers and homeownership
    "x_tenure_t0",
    "x_homeowner_t0",
    ## Partisanship
    "x_pid_7n_t0",
    ## Attention to politics at local / national level:
    "x_atnpol_local_n_t0",
    "x_atnpol_national_n_t0",
    ## Trust and confidence in police (index)
    "y_trust_idx_t0",
    ## Cooperation index
    "y_coop_idx_t0",
    ## Stated support for diversification policy
    "y_policy_supp_dvsty_n_t0",
    ## Rank ordering of diversification policy
    "y_policy_rank_dvsty_n_t0", 
    ## Trust/Confidence/legitimacy index
    "y_ltc_idx_t0",
    ## Differences between guess and truth in followup survey
    "x_diverse_diff_white_t1",
    "x_diverse_diff_black_t1",
    "x_diverse_diff_latino_t1",
    "x_diverse_diff_asian_t1",
    "x_diverse_diff_female_t1",
    "x_diverse_diff_male_t1",
    ## Other pre-treatment measures in followup survey:
    "x_contact_recent_t1",
    "x_contact_unfair_t1",
    "x_safety_walk_n_t1",
    "x_safety_victim_t1"
  )


cvs_dat %>%
  filter(R_t1 == 1) %>% 
  mutate(
    x_emp_work = as.numeric(x_emp_long_t0 == "Working"),
    x_emp_unem = as.numeric(x_emp_long_t0 == "Not working"),
    x_emp_ret = as.numeric(x_emp_long_t0 == "Retired")
  ) %>%
  select(these_covs_m[1:7],
         x_emp_work,
         x_emp_unem,
         x_emp_ret,
         these_covs_m[8:27]) %>%
  vtable::st(.,
             summ = c('mean(x)',
                      'sd(x)',
                      'min(x)',
                      'max(x)',
                      'notNA(x)'),
             digits = 2, 
             fixed.digits = TRUE,
             out = "latex")



#### ---- Causal forest estimated CATEs (S2.1.3) ------------------------- ####

## Note that the below code chunks for estimating CATEs using causal forest
## are optional. They may take a long time to execute depending on your machine.
## The estimates have been saved and can simply be loaded below. After loading
## these estimates you can skip the optional code chunks and proceed directly 
## to creating the tables/figures

## Causal forest estimated CATEs and test statistics for municipal sample:
grf_municipal <- read_rds("grf_municipal.rds")
grf_municipal_test <- read_rds("grf_municipal_test.rds")

## Causal forest estimated CATEs and test statistics for national sample:
grf_national <- read_rds("grf_national.rds")
grf_national_test <- read_rds("grf_national_test.rds")

#### -------- Estimation (optional): causal forest, municipal sample ----- ####
## Specify list of pre-treatment covariates 
covariates <-
  c(
    ## Baseline demos/background characteristics
    "x_female_t0",
    "x_race_cat_t0",
    "x_age_cat_t0",
    "x_immi_short_t0",
    "x_edu_short_t0",
    "x_income_cat_t0",
    "x_emp_short_t0",
    "x_pid_7n_t0",
    "x_tenure_t0",
    "x_homeowner_t0",
    ## Attention to politics at local / national level:
    "x_atnpol_local_n_t0",
    "x_atnpol_national_n_t0",
    ## Prior contact with police:
    "x_contact_ncops_n_t0",
    "x_contact_freq_n_t0",
    "x_contact_arrest_t0",
    "x_contact_recent_t0",
    "x_contact_unfair_t0",
    ## Feelings of safety and crime victimization:
    "x_safety_walk_n_t0",
    "x_safety_victim_t0",
    ## Trust and confidence in police
    "y_trust_idx_t0",
    ## Stated support for diversification
    "y_policy_supp_dvsty_n_t0",
    ## Rank ordering of diversification policy
    "y_policy_rank_dvsty_n_t0", 
    ## Trust/legitimacy index
    "y_ltc_idx_t0",
    ## Cooperation index
    "y_coop_idx_t0",
    ## Prior contact with police:
    "x_contact_recent_t1",
    "x_contact_unfair_t1",
    ## Feelings of safety and crime victimization:
    "x_safety_walk_n_t1",
    "x_safety_victim_t1",
    ## Differences between guess and truth
    "x_diverse_diff_white_t1",
    "x_diverse_diff_black_t1",
    "x_diverse_diff_latino_t1",
    "x_diverse_diff_asian_t1",
    "x_diverse_diff_female_t1",
    "x_diverse_diff_male_t1"
  )

## List of outcomes
outcomes <-
  c(
    "y_vote_dvsty_t1",
    "y_donate_n_t1",
    "y_trolley_idx_t1",
    "y_aa_idx_t1",
    "y_coop_idx_t1",
    "y_trust_idx_t1"
  )

# NB: GRF only accepts rows with complete cases on DVs and all 
# covariates. 
est_dat <- 
  cvs_dat %>% 
  filter(R_t1 == 1) %>% 
  select(covariates, Z_diversity_t1, outcomes)

index <- complete.cases(est_dat)
X <- est_dat[index, c(covariates, "Z_diversity_t1")]

W <- as.numeric(X$Z_diversity_t1 == "info")

## Recode covariates to numeric
X$x_white <- as.numeric(X$x_race_cat_t0 == "White") 
X$x_black <- as.numeric(X$x_race_cat_t0 == "Black") 
X$x_hispanic <- as.numeric(X$x_race_cat_t0 == "Hispanic") 
## Combine rarely sampled sub-groups
X$x_other <- as.numeric(X$x_race_cat_t0 %in% c("AAPI", "Other")) 
X$x_immi <- as.numeric(X$x_immi_short_t0 == "Not born in USA")
X$x_age <- as.integer(factor(
  X$x_age_cat_t0,
  levels = c(
    "18-24",
    "25-29",
    "30-34",
    "35-44",
    "45-54",
    "55-64",
    "65-74",
    "75+"
  )
))
X$x_hhi <- as.integer(X$x_income_cat_t0)
X$x_edu <- as.integer(X$x_edu_short_t0)

## Keep numeric covariates for GRF: 
nums <- unlist(lapply(X, is.numeric))  
X <- as.matrix(X[, nums])

# Step 1. Train a causal forest using the observed outcome (Y_Obs), the matrix
# of pre-treatment covariates (X), and reatment assignment vector (W).
set.seed(123)

## Support for affirmative action in recruitment and hiring
Y_Obs <- est_dat$y_aa_idx_t1[index]
tau_aa <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

## Support for tie-breaking in favor of minority applicants
Y_Obs <- est_dat$y_trolley_idx_t1[index]

tau_trolley <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

## Voted for police department diversification
Y_Obs <- est_dat$y_vote_dvsty_t1[index]
tau_vote <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

## Donation to Black police officer association
Y_Obs <- est_dat$y_donate_n_t1[index]
tau_donate <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

## Trust and confidence in police department
Y_Obs <- est_dat$y_trust_idx_t1[index]
tau_trust <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

## Willingness to cooperate with police officers
Y_Obs <- est_dat$y_coop_idx_t1[index]
tau_coop <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

## Compute tests for heterogeneity and store results.
grf_municipal_test <-
  bind_rows(
    "Support for affirmative action in recruitment and hiring" = tidy(test_calibration(tau_aa)),
    "Support for tie-breaking in favor of minority applicants" = tidy(test_calibration(tau_trolley)),
    "Voted for police department diversification" = tidy(test_calibration(tau_vote)),
    "Donation to Black police officer association" = tidy(test_calibration(tau_donate)),
    "Trust and confidence in the police department" = tidy(test_calibration(tau_trust)),
    "Willingness to cooperate with police officers" = tidy(test_calibration(tau_coop)),
    .id = "outcome"
  ) %>%
  mutate_if(is.numeric, round, 2) %>%
  mutate(sample = "Municipal sample")


# Step 2. Estimate the treatment effects for all units using
# out-of-bag prediction.
tau_hat_aa <- predict(tau_aa, estimate.variance = TRUE)
tau_hat_trolley <- predict(tau_trolley, estimate.variance = TRUE)
tau_hat_vote <- predict(tau_vote, estimate.variance = TRUE)
tau_hat_donate <- predict(tau_donate, estimate.variance = TRUE)
tau_hat_trust <- predict(tau_trust, estimate.variance = TRUE)
tau_hat_coop <- predict(tau_coop, estimate.variance = TRUE)

# Step 3. Compute estimated SEs treatment effects for predictions. 
sigma_hat_aa <- sqrt(tau_hat_aa$variance.estimates)
sigma_hat_trolley <- sqrt(tau_hat_trolley$variance.estimates)
sigma_hat_vote <- sqrt(tau_hat_vote$variance.estimates)
sigma_hat_donate <- sqrt(tau_hat_donate$variance.estimates)
sigma_hat_trust <- sqrt(tau_hat_trust$variance.estimates)
sigma_hat_coop <- sqrt(tau_hat_coop$variance.estimates)

# Step 4. Store the estimated treatment effects and CIs in a data frame.
grf_aa <- 
  cbind(data.frame(
    tau_hat = tau_hat_aa$predictions,
    ci_upper = tau_hat_aa$predictions + 1.96 * sigma_hat_aa,
    ci_lower = tau_hat_aa$predictions - 1.96 * sigma_hat_aa),
    X
  ) %>% 
  mutate(outcome = "Support for affirmative action in recruitment and hiring")

grf_trolley <- 
  cbind(data.frame(
    tau_hat = tau_hat_trolley$predictions,
    ci_upper = tau_hat_trolley$predictions + 1.96 * sigma_hat_trolley,
    ci_lower = tau_hat_trolley$predictions - 1.96 * sigma_hat_trolley),
    X
  ) %>% 
  mutate(outcome = "Support for tie-breaking in favor of minority applicants")

grf_vote <- 
  cbind(data.frame(
    tau_hat = tau_hat_vote$predictions,
    ci_upper = tau_hat_vote$predictions + 1.96 * sigma_hat_vote,
    ci_lower = tau_hat_vote$predictions - 1.96 * sigma_hat_vote),
    X
  ) %>% 
  mutate(outcome = "Voted for police department diversification")

grf_donate <- 
  cbind(data.frame(
    tau_hat = tau_hat_donate$predictions,
    ci_upper = tau_hat_donate$predictions + 1.96 * sigma_hat_donate,
    ci_lower = tau_hat_donate$predictions - 1.96 * sigma_hat_donate),
    X
  ) %>% 
  mutate(outcome = "Donation to Black police officer association")

grf_trust <- 
  cbind(data.frame(
    tau_hat = tau_hat_trust$predictions,
    ci_upper = tau_hat_trust$predictions + 1.96 * sigma_hat_trust,
    ci_lower = tau_hat_trust$predictions - 1.96 * sigma_hat_trust),
    X
  ) %>% 
  mutate(outcome = "Trust and confidence in the police department")

grf_coop <- 
  cbind(data.frame(
    tau_hat = tau_hat_coop$predictions,
    ci_upper = tau_hat_coop$predictions + 1.96 * sigma_hat_coop,
    ci_lower = tau_hat_coop$predictions - 1.96 * sigma_hat_coop),
    X
  ) %>% 
  mutate(outcome = "Willingness to cooperate with police officers")

# Order treatment effects by size
grf_municipal <-
  bind_rows(grf_aa, grf_trolley, grf_vote, grf_donate, grf_trust, grf_coop) %>%
  mutate(outcome = factor(
    outcome,
    levels = c(
      "Support for affirmative action in recruitment and hiring",
      "Support for tie-breaking in favor of minority applicants",
      "Voted for police department diversification",
      "Donation to Black police officer association",
      "Trust and confidence in the police department",
      "Willingness to cooperate with police officers"
    )
  )) %>%
  group_by(outcome) %>%
  mutate(order = rank(tau_hat, ties.method = "first"),
         sample = "Municipal sample") %>%
  ungroup() 

## Export results
# write_rds(grf_municipal,
#           file = "grf_municipal.rds")
# 
# write_rds(grf_municipal_test,
#           file = "grf_municipal_test.rds")

#### -------- Estimation (optional): causal forest, national sample ------ ####

## Specify list of pre-treatment covariates:
covariates <-
  c(
    "x_female",
    "x_age_cat",
    "x_edu",
    "x_safety_walk_n",
    "x_contact_freq_n",
    "x_contact_recent",
    "x_hhi_short",
    "x_pid_7n",
    "x_race",
    "x_trust_idx",
    "x_region",
    "x_usa_diff_white",
    "x_usa_diff_black",
    "x_usa_diff_latino",
    "x_usa_diff_asian",
    "x_usa_diff_male",
    "x_usa_diff_female"
  )

## Outcomes:
outcomes <-
  c("y_trolley_idx",
    "y_aa_idx")

# NB: GRF only accepts rows with complete cases on DVs and all 
# covariates. 

## Subset to treatment arms that exist in municipal experiment for comparisons
est_dat <- 
  lucid_dat %>% 
  filter(Z_diversity %in% c("Control", "Info")) %>% 
  select(covariates, Z_diversity, outcomes)

index <- complete.cases(est_dat)
X <- est_dat[index, c(covariates, "Z_diversity")]

W <- as.numeric(X$Z_diversity == "Info")

## Recode covariates to numeric
X$x_white <- as.numeric(X$x_race == "White") 
X$x_black <- as.numeric(X$x_race == "Black") 
X$x_hispanic <- as.numeric(X$x_race == "Hispanic") 
## Combine rarely sampled sub-groups
X$x_other <- as.numeric(X$x_race %in% c("AAPI", "Other")) 
X$x_west <- as.numeric(X$x_region == "West")
X$x_midwest <- as.numeric(X$x_region == "Midwest")
X$x_south <- as.numeric(X$x_region == "South")
X$x_northeast <- as.numeric(X$x_region == "Northeast")
X$x_age <- as.integer(X$x_age_cat)
X$x_hhi <- as.integer(X$x_hhi_short)
X$x_edu <- as.integer(X$x_edu)

nums <- unlist(lapply(X, is.numeric))  
X <- as.matrix(X[, nums])

# Step 1. Train a causal forest using the observed outcome (Y_Obs), the matrix
# of pre-treatment covariates (X), and reatment assignment vector (W).
set.seed(123)

## Support for affirmative action in recruitment and hiring
Y_Obs <- est_dat$y_aa_idx[index]
tau_aa <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

## Support for tie-breaking in favor of minority applicants
Y_Obs <- est_dat$y_trolley_idx[index]
tau_trolley <-
  causal_forest(
    Y = Y_Obs,
    X = X,
    W = W,
    honesty = TRUE,
    num.trees = 4000,
    seed = 123
  )

grf_national_test <-
  bind_rows(
    "Support for affirmative action in recruitment and hiring" = tidy(test_calibration(tau_aa)),
    "Support for tie-breaking in favor of minority applicants" = tidy(test_calibration(tau_trolley)),
    .id = "outcome"
  ) %>%
  mutate_if(is.numeric, round, 2) %>%
  mutate(sample = "National sample")

# Step 2. Estimate the treatment effects for all units using
# out-of-bag prediction.
tau_hat_aa <- predict(tau_aa, estimate.variance = TRUE)
tau_hat_trolley <- predict(tau_trolley, estimate.variance = TRUE)

# Step 3. Compute estimated SEs treatment effects for predictions. 
sigma_hat_aa <- sqrt(tau_hat_aa$variance.estimates)
sigma_hat_trolley <- sqrt(tau_hat_trolley$variance.estimates)

# Step 4. Store the estimated treatment effects and CIs in a data frame.
grf_aa <- 
  cbind(data.frame(
    tau_hat = tau_hat_aa$predictions,
    ci_upper = tau_hat_aa$predictions + 1.96 * sigma_hat_aa,
    ci_lower = tau_hat_aa$predictions - 1.96 * sigma_hat_aa),
    X
  ) %>% 
  mutate(outcome = "Support for affirmative action in recruitment and hiring")

grf_trolley <- 
  cbind(data.frame(
    tau_hat = tau_hat_trolley$predictions,
    ci_upper = tau_hat_trolley$predictions + 1.96 * sigma_hat_trolley,
    ci_lower = tau_hat_trolley$predictions - 1.96 * sigma_hat_trolley),
    X
  ) %>% 
  mutate(outcome = "Support for tie-breaking in favor of minority applicants")


# Order treatment effects by size
grf_national <-
  bind_rows(grf_aa, grf_trolley) %>%
  mutate(outcome = factor(
    outcome,
    levels = c(
      "Support for affirmative action in recruitment and hiring",
      "Support for tie-breaking in favor of minority applicants"
    )
  )) %>%
  group_by(outcome) %>%
  mutate(order = rank(tau_hat, ties.method = "first"),
         sample = "National sample") %>%
  ungroup() 

# # Export results
# write_rds(grf_national,
#           file = "grf_national.rds")
# 
# 
# write_rds(grf_national_test,
#           file = "grf_national_test.rds")


#### ---- Fig. S13-S14: plot causal forest estimates by sample ----------- ####

## Combine estimates for plotting:
plot_df <- 
  bind_rows(grf_municipal, grf_national)

grf_plot_m <-
  plot_df %>%
  filter(sample == "Municipal sample") %>% 
  ggplot(., aes(x = tau_hat, y = order)) + 
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), 
                 col = "dimgrey",
                 alpha = 0.2, 
                 height = 0, 
                 size = 0.5) + 
  geom_point(size = 1, pch = 20,
             alpha = 0.8, 
             fill = "black",
             col = "black"
  ) +
  geom_vline(xintercept = 0, size = 0.5, lty = 2) +
  labs(title = "",
       x = "",
       y = "") + 
  facet_wrap(~ outcome, scales = "free_x", ncol = 2) +
  scale_y_continuous(expand = c(0.01,0.01)) + 
  scale_x_continuous(expand = c(0.05,0.05)) + 
  theme_tufte() +
  theme(axis.ticks.y = element_blank(),
        axis.ticks.x = element_line(size = 0.25),
        strip.text = element_text(size = 11),
        axis.line.x = element_line(size = 0.25),
        axis.title.x = element_text(size = 10),
        axis.text.y = element_blank(),
        axis.line.y = element_blank(),
        panel.border = element_rect(fill = NA),
        strip.background = element_rect(fill = "lightgrey"),
        plot.margin = unit(c(t = -0.5, r = 0.5, b = 0.1, l = -0.5), "lines")
  ) 

ggsave(grf_plot_m,
       filename = "grf_municipal.pdf",
       height = 12, width = 7.5, device = "pdf", dpi = 1000)

grf_plot_n <-
  plot_df %>%
  filter(sample == "National sample") %>% 
  ggplot(., aes(x = tau_hat, y = order)) + 
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), 
                 col = "dimgrey",
                 alpha = 0.2, 
                 height = 0, 
                 size = 0.5) + 
  geom_point(size = 1, pch = 20,
             alpha = 0.8, 
             fill = "black",
             col = "black"
  ) +
  geom_vline(xintercept = 0, size = 0.5, lty = 2) +
  labs(title = "",
       x = "",
       y = "") + 
  facet_wrap(~ outcome, scales = "free_x", ncol = 2) +
  scale_y_continuous(expand = c(0.01,0.01)) + 
  scale_x_continuous(expand = c(0.05,0.05)) + 
  theme_tufte() +
  theme(axis.ticks.y = element_blank(),
        axis.ticks.x = element_line(size = 0.25),
        strip.text = element_text(size = 11),
        axis.line.x = element_line(size = 0.25),
        axis.title.x = element_text(size = 10),
        axis.text.y = element_blank(),
        axis.line.y = element_blank(),
        panel.border = element_rect(fill = NA),
        strip.background = element_rect(fill = "lightgrey"),
        plot.margin = unit(c(t = -0.5, r = 0.5, b = 0.1, l = -0.5), "lines")
  ) 


ggsave(grf_plot_n,
       filename = "grf_national.pdf",
       height = 5, width = 7.5, device = "pdf", dpi = 1000)

#### ---- Table S11: omnibus test for heterogeneity ---------------------- #####

## Test statistics for heterogeneity: 
forest_test_df <-
  bind_rows(grf_municipal_test,
            grf_national_test) %>%
  mutate(outcome = factor(
    outcome,
    levels = c(
      "Support for affirmative action in recruitment and hiring",
      "Support for tie-breaking in favor of minority applicants",
      "Voted for police department diversification",
      "Donation to Black police officer association",
      "Trust and confidence in the police department",
      "Willingness to cooperate with police officers"
    )
  ))

forest_test_df %>% 
  filter(!str_detect(term, "mean.forest")) %>% 
  arrange(outcome) %>% 
  select(-term) %>% 
  xtable(., ) %>%
  print(include.rownames = FALSE, 
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "grf_test_out.tex")

#### ---- Table S12: summary of causal forest estimates ------------------ #####

## What proportion of point estimates are positive/negative? 
prop_est <- 
  plot_df %>% 
  group_by(outcome, sample) %>% 
  summarise(prop_pos = mean(tau_hat > 0),
            prop_neg = mean(tau_hat < 0)) 

# What propotion of CIs include / exclude zero? 
prop_coverage <- 
  plot_df %>% 
  group_by(outcome, sample) %>%
  summarise(
    include_zero = mean(ci_lower <= 0 & ci_upper >= 0),
    exclude_zero = 1 - include_zero
  )

# What proportion of the CIs that don't include zero are positive/negative? 
these <- with(plot_df, which(ci_lower <= 0 & ci_upper >= 0))
prop_sig <- 
  plot_df[-these,] %>% 
  group_by(outcome, sample) %>% 
  summarise(prop_sig_pos = mean(tau_hat > 0),
            prop_sig_neg = mean(tau_hat < 0))

## Combine for summary table and export 
grf_summary <- 
  left_join(prop_est, prop_coverage) %>% 
  left_join(., prop_sig) %>% 
  ungroup() %>% 
  mutate_if(is.numeric, round,  2)

grf_summary %>% 
  xtable(., ) %>%
  print(include.rownames = FALSE, 
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "grf_summary.tex")


#### ---- Plot causal forest estimates by pre-treatment covariates ------- ####

## Prep subset of estimates for municipal sample
plot_df_m <- 
  plot_df %>%
  filter(sample == "Municipal sample") %>%
  select(outcome, tau_hat, ci_upper, ci_lower, starts_with("x_diverse")) %>%
  gather(key = "X", value = "value", starts_with("x_diverse")) %>%
  mutate(
    X = case_when(
      str_detect(X, "white") ~ "White",
      str_detect(X, "black") ~ "Black",
      str_detect(X, "latino") ~ "Hispanic/Latino",
      str_detect(X, "asian") ~ "Asian/Other",
      str_detect(X, "female") ~ "Female",
      str_detect(X, "male") ~ "Male"
    ),
    X = factor(
      X,
      levels = c(
        "White",
        "Black",
        "Hispanic/Latino",
        "Asian/Other",
        "Male",
        "Female"
      )
    )
  ) 

## Prep subset of estimates for national sample
plot_df_n <- 
  plot_df %>%
  filter(sample == "National sample") %>%
  select(outcome, tau_hat, ci_upper, ci_lower, starts_with("x_usa_diff")) %>%
  gather(key = "X", value = "value", starts_with("x_usa_diff")) %>%
  mutate(
    X = case_when(
      str_detect(X, "white") ~ "White",
      str_detect(X, "black") ~ "Black",
      str_detect(X, "latino") ~ "Hispanic/Latino",
      str_detect(X, "asian") ~ "Asian/Other",
      str_detect(X, "female") ~ "Female",
      str_detect(X, "male") ~ "Male"
    ),
    X = factor(
      X,
      levels = c(
        "White",
        "Black",
        "Hispanic/Latino",
        "Asian/Other",
        "Male",
        "Female"
      )
    )
  ) 

#### ---- Fig. S15-S16: predicted - actual share of White officers ------- ####
m_white <- 
  plot_df_m %>% 
  filter(str_detect(X, regex("White", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of White police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_whitecop_m.pdf", 
       m_white, 
       width = 7.5,
       height = 8)


n_white <- 
  plot_df_n %>% 
  filter(str_detect(X, regex("White", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of White police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_whitecop_n.pdf",
       n_white, 
       width = 7.5,
       height = 3.5)


#### ---- Fig. S17-S18: predicted - actual share of Black officers ------- ####
m_black <- 
  plot_df_m %>% 
  filter(str_detect(X, regex("black", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of Black police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_blackcop_m.pdf", 
       m_black, 
       width = 7.5,
       height = 8)


n_black <- 
  plot_df_n %>% 
  filter(str_detect(X, regex("black", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of Black police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_blackcop_n.pdf",
       n_black,
       width = 7.5,
       height = 3.5)

#### ---- Fig. S19-S20: predicted - actual share of Latino officers ------ ####
m_latino <- 
  plot_df_m %>% 
  filter(str_detect(X, regex("latino", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of Hispanic/Latino police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_latinocop_m.pdf",
       m_latino, 
       width = 7.5,
       height = 8)


n_latino <- 
  plot_df_n %>% 
  filter(str_detect(X, regex("latino", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of Hispanic/Latino police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_latinocop_n.pdf",
       n_latino, 
       width = 7.5,
       height = 3.5)


#### ---- Fig. S21-S22: predicted - actual share of Asian officers ------- ####
m_asian <- 
  plot_df_m %>% 
  filter(str_detect(X, regex("asian", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of Asian police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_asiancop_m.pdf",
       m_asian, 
       width = 7.5,
       height = 8)


n_asian <- 
  plot_df_n %>% 
  filter(str_detect(X, regex("asian", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of Asian police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_asiancop_n.pdf", 
       n_asian, 
       width = 7.5,
       height = 3.5)


#### ---- Fig. S23-S24: predicted - actual share of Male officers -------- ####
m_male <- 
  plot_df_m %>% 
  filter(str_detect(X, regex("Male", ignore_case = FALSE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of male police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_malecop_m.pdf", 
       m_male, 
       width = 7.5,
       height = 8)


n_male <- 
  plot_df_n %>% 
  filter(str_detect(X, regex("Male", ignore_case = FALSE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of male police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_malecop_n.pdf",
       n_male, 
       width = 7.5,
       height = 3.5)


#### ---- Fig. S25-S26: predicted - actual share of Female officers ------ ####
m_female <- 
  plot_df_m %>% 
  filter(str_detect(X, regex("female", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of female police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_femalecop_m.pdf", 
       m_female, 
       width = 7.5,
       height = 8)


n_female <- 
  plot_df_n %>% 
  filter(str_detect(X, regex("female", ignore_case = TRUE))) %>%
  ggplot(.,aes(y = tau_hat, x = value, group = X)) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_jitter(alpha = 0.8,
              pch = 21,
              fill = "darkgrey"
  ) +
  geom_smooth(method = "lm_robust", se = FALSE, col = "black", size = 0.75) + 
  facet_wrap( ~ outcome, ncol = 2, scale = "free_y") +
  theme_tufte() +
  labs(x = "Predicted - actual share of female police officers",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_femalecop_n.pdf",
       n_female, 
       width = 7.5,
       height = 3.5)



#### ---- Fig. S27-S28: CATEs by respondent race/ethnicity --------------- ####
m_race <-
  plot_df %>%
  filter(sample == "Municipal sample") %>%
  mutate(
    x_race = case_when(
      x_white == 1 ~ "White",
      x_black == 1 ~ "Black",
      x_hispanic == 1 ~ "Hispanic/Latino",
      x_other == 1 ~ "Other"
    ),
    x_race = factor(x_race, levels = c(
      "White", "Black", "Hispanic/Latino",
      "Other"
    ))
  ) %>%
  ggplot(.,
         aes(
           y = tau_hat,
           x = x_race,
           color = x_race,
           fill = x_race
         )) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_point(alpha = 0.5,
             pch = 21,
             position = position_dodge2(.5)) +
  geom_boxplot(
    fill = NA,
    color = "black",
    outlier.shape = NA,
    notch = TRUE
  ) +
  scale_color_manual("", values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")) +
  scale_fill_manual("", values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")) +
  facet_wrap(~ outcome, scale = "free_y", ncol = 2) +
  theme_tufte() +
  labs(x = "",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  )

ggsave(filename = "grf_by_race_m.pdf",
       m_race, 
       width = 7.5,
       height = 8)


n_race <- 
  plot_df %>%
  filter(sample == "National sample") %>%
  mutate(
    x_race = case_when(
      x_white == 1 ~ "White",
      x_black == 1 ~ "Black",
      x_hispanic == 1 ~ "Hispanic/Latino",
      x_other == 1 ~ "Other"
    ),
    x_race = factor(x_race, levels = c(
      "White", "Black", "Hispanic/Latino",
      "Other"
    ))
  ) %>%
  ggplot(.,
         aes(
           y = tau_hat,
           x = x_race,
           color = x_race,
           fill = x_race
         )) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_point(alpha = 0.5,
             pch = 21,
             position = position_dodge2(.5)) +
  geom_boxplot(
    fill = NA,
    color = "black",
    outlier.shape = NA,
    notch = TRUE
  ) +
  scale_color_manual("", values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")) +
  scale_fill_manual("", values = c("#E69F00", "#56B4E9", "#009E73", "#CC79A7")) +
  facet_wrap(~ outcome, scale = "free_y", ncol = 2) +
  theme_tufte() +
  labs(x = "",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_race_n.pdf",
       n_race, 
       width = 7.5,
       height = 3.5)

#### ---- Fig. S29-S30: CATEs by respondent sex -------------------------- ####
m_sex <- 
  plot_df %>%
  filter(sample == "Municipal sample") %>%
  mutate(
    x_sex = ifelse(x_female_t0 == 1, "Female", "Male")
  ) %>%
  ggplot(.,
         aes(
           y = tau_hat,
           x = x_sex,
           color = x_sex,
           fill = x_sex
         )) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_point(alpha = 0.5,
             pch = 21,
             position = position_dodge2(.5)) +
  geom_boxplot(
    fill = NA,
    color = "black",
    outlier.shape = NA,
    notch = TRUE
  ) +
  scale_color_manual("", values = c("#E69F00", "#56B4E9")) +
  scale_fill_manual("", values = c("#E69F00", "#56B4E9")) +
  facet_wrap( ~ outcome, scale = "free_y", ncol = 2) +
  theme_tufte() +
  labs(x = "",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_sex_m.pdf",
       m_sex, 
       width = 7.5,
       height = 8)


n_sex <- 
  plot_df %>%
  filter(sample == "National sample") %>%
  mutate(
    x_sex = ifelse(x_female == 1, "Female", "Male")
  ) %>%
  ggplot(.,
         aes(
           y = tau_hat,
           x = x_sex,
           color = x_sex,
           fill = x_sex
         )) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_point(alpha = 0.5,
             pch = 21,
             position = position_dodge2(.5)) +
  geom_boxplot(
    fill = NA,
    color = "black",
    outlier.shape = NA,
    notch = TRUE
  ) +
  scale_color_manual("", values = c("#E69F00", "#56B4E9")) +
  scale_fill_manual("", values = c("#E69F00", "#56B4E9")) +
  facet_wrap(~ outcome, scale = "free_y", ncol = 2) +
  theme_tufte() +
  labs(x = "",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 10),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 0,
      l = 0.1
    ), "lines"),
    legend.position = "none",
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) 

ggsave(filename = "grf_by_sex_n.pdf",
       n_sex, 
       width = 7.5,
       height = 3.5)

#### ---- Fig. S31-S32: CATEs by respondent partisanship ----------------- ####
bpr_colors <- c("#1F3A93", "#7C2C55", "#D91E18")

pid_labs <- levels(cvs_dat$x_pid_7_t0)

m_pid <-
  plot_df %>%
  filter(sample == "Municipal sample") %>%
  ggplot(.,
         aes(
           y = tau_hat,
           x = factor(x_pid_7n_t0),
           color = x_pid_7n_t0,
           fill = x_pid_7n_t0
         )) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_point(alpha = 0.5,
             pch = 21,
             position = position_dodge2(.5)) +
  scale_fill_gradient2(
    name = "",
    labels = pid_labs,
    low = bpr_colors[1],
    mid = bpr_colors[2],
    high = bpr_colors[3],
    midpoint = 4,
    guide = "none"
  ) +
  scale_color_gradient2(
    name = "",
    labels = pid_labs,
    low = bpr_colors[1],
    mid = bpr_colors[2],
    high = bpr_colors[3],
    midpoint = 4,
    guide = "legend"
  ) +
  geom_boxplot(
    fill = NA,
    color = "black",
    outlier.shape = NA,
    notch = TRUE
  ) +
  facet_wrap( ~ outcome, scale = "free_y", ncol = 2) +
  theme_tufte() +
  labs(x = "",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 11),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 2,
      l = 0.1
    ), "lines"),
    legend.position = c(.5, -0.025),
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) +
  guides(fill = guide_legend(
    nrow = 2,
    byrow = TRUE,
    override.aes = list(
      size = 2,
      alpha = 1,
      key_glyph = "polygon3"
    )
  )) 


ggsave(filename = "grf_by_pid_m.pdf",
       m_pid, 
       width = 8.5,
       height = 9.5)


n_pid <-
  plot_df %>%
  filter(sample == "National sample") %>%
  ggplot(.,
         aes(
           y = tau_hat,
           x = factor(x_pid_7n),
           color = x_pid_7n,
           fill = x_pid_7n
         )) +
  geom_hline(
    yintercept = 0,
    color = "black",
    lty = 2,
    size = 0.5
  ) +
  geom_point(alpha = 0.5,
             pch = 21,
             position = position_dodge2(.5)) +
  scale_fill_gradient2(
    name = "",
    labels = pid_labs,
    low = bpr_colors[1],
    mid = bpr_colors[2],
    high = bpr_colors[3],
    midpoint = 4,
    guide = "none"
  ) +
  scale_color_gradient2(
    name = "",
    labels = pid_labs,
    low = bpr_colors[1],
    mid = bpr_colors[2],
    high = bpr_colors[3],
    midpoint = 4,
    guide = "legend"
  ) +
  geom_boxplot(
    fill = NA,
    color = "black",
    outlier.shape = NA,
    notch = TRUE
  ) +
  facet_wrap( ~ outcome, scale = "free_y", ncol = 2) +
  theme_tufte() +
  labs(x = "",
       y = expression(hat(tau)^(-i)~(X[i])))  +
  theme(
    strip.text = element_text(size = 11),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    plot.margin = unit(c(
      t = 0.5,
      r = 0.5,
      b = 2,
      l = 0.1
    ), "lines"),
    legend.position = c(.5, -0.025),
    legend.spacing = unit(0.1, 'lines'),
    legend.spacing.x = unit(0.1, 'lines'),
    legend.spacing.y = unit(0.3, 'lines'),
    legend.key = element_rect(color = NA, fill = NA),
    panel.border = element_rect(fill = NA),
    strip.background = element_rect(fill = "lightgrey"),
    legend.key.size = unit(0.4, "cm")
  ) +
  guides(fill = guide_legend(
    nrow = 2,
    byrow = TRUE,
    override.aes = list(
      size = 2,
      alpha = 1,
      key_glyph = "polygon3"
    )
  )) 


ggsave(filename = "grf_by_pid_n.pdf",
       n_pid, 
       width = 8,
       height = 4)

#### ---- IV estimates on primary outcomes in municipal experiment ------- ####

## Rescale manipulation check question so that higher values indicate 
## stronger disagreement
cvs_dat <-
  cvs_dat %>%
  mutate(D = 8 - y_diverse_manip_n_t1)

round(table(cvs_dat$D[cvs_dat$Z_diversity_t1 == "info"]) /
        sum(table(cvs_dat$D[cvs_dat$Z_diversity_t1 == "info"])), 2)

round(table(cvs_dat$D[cvs_dat$Z_diversity_t1 == "control"]) /
        sum(table(cvs_dat$D[cvs_dat$Z_diversity_t1 == "control"])), 2)

## What proportions provided response above neutral midpoint? 
cvs_dat %>%
  group_by(Z_diversity_t1) %>% 
  summarise(prop = mean(D > 4))

## Average in each group
cvs_dat %>% 
  filter(!is.na(Z_diversity_t1)) %>% 
  group_by(Z_diversity_t1) %>% 
  do(tidy(lm_robust(D ~ 1, data = .)))

#### -------- First stage estimates for ITTd ----------------------------- ####
first_stage_est <-
  lm_robust(D ~ Z_diversity_t1, data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info") %>% 
  mutate(estimator = "OLS",
         estimand = "ITTd")

## Run this for f-statistic: 32.44 (P < 0.001)
lm_robust(D ~ Z_diversity_t1, data = cvs_dat) %>%
  summary()

#### -------- Reduced form estimates for ITTy ---------------------------- ####

## Affirmative action hiring (y_aa_idx)
est_aa_idx <-
  lm_robust(y_aa_idx_t1 ~ Z_diversity_t1, cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

## Hiring minority applicants (y_trolley_idx) 
est_trolley_idx <- 
  lm_robust(y_trolley_idx_t1 ~ Z_diversity_t1,
            data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

## Donations to pro-Black organization (y_donate) 
est_donate <- 
  cvs_dat %>%
  lm_robust(y_donate_n_t1 ~ Z_diversity_t1, data = .) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

## Political action (y_vote_dvsty)
est_vote <- 
  cvs_dat %>%
  lm_robust(y_vote_dvsty_t1 ~ Z_diversity_t1, data = .) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

## Trust in police (y_trust_idx)
est_trust_idx <- 
  lm_lin(y_trust_idx_t1 ~ Z_diversity_t1,
         covariates =  ~ y_trust_idx_t0, data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

## Willingness to cooperate (y_coop_idx) 
est_coop_idx <- 
  lm_lin(y_coop_idx_t1 ~ Z_diversity_t1,
         covariates = ~ y_coop_idx_t0,
         data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "Z_diversity_t1info")

reduced_form_est <-
  bind_rows(est_aa_idx,
            est_trolley_idx,
            est_donate,
            est_vote,
            est_trust_idx,
            est_coop_idx) %>%
  mutate(estimator = "OLS",
         estimand = "ITTy")

#### -------- 2SLS estimates for ITTy/ITTd ------------------------------- ####

## Affirmative action hiring (y_aa_idx)
est_aa_idx <-
  iv_robust(y_aa_idx_t1 ~ D | Z_diversity_t1, cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "D")

## Hiring minority applicants (y_trolley_idx) 
est_trolley_idx <- 
  iv_robust(y_trolley_idx_t1 ~ D | Z_diversity_t1,
            data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "D")

## Donations to pro-Black organization (y_donate) 
est_donate <- 
  cvs_dat %>%
  iv_robust(y_donate_n_t1 ~ D | Z_diversity_t1, data = .) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "D")

## Political action (y_vote_dvsty)
est_vote <- 
  cvs_dat %>%
  iv_robust(y_vote_dvsty_t1 ~ D | Z_diversity_t1, data = .) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "D")

## Trust in police (y_trust_idx)
est_trust_idx <- 
  iv_robust(y_trust_idx_t1 ~ D + y_trust_idx_t0 | Z_diversity_t1 + y_trust_idx_t0, 
            data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "D")

## Willingness to cooperate (y_coop_idx) 
est_coop_idx <- 
  iv_robust(y_coop_idx_t1 ~ D + y_coop_idx_t0 | Z_diversity_t1 + y_coop_idx_t0, 
            data = cvs_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "D")


## Combine
iv_est <- 
  bind_rows(est_aa_idx, est_trolley_idx, est_donate, est_vote, 
            est_trust_idx, est_coop_idx) %>% 
  mutate(estimator = "2SLS",
         estimand = "LATE")

#### ---- Table S13: estimates from reduced-form regressions and IV ------ #####
iv_est_df <- 
  bind_rows(first_stage_est, reduced_form_est, iv_est) %>% 
  mutate(
    outcome = case_when(
      str_detect(outcome, "y_trolley_idx") ~ "Diversity hiring",
      str_detect(outcome, "y_aa_idx") ~ "Affirmative action",
      outcome == "y_vote_dvsty_t1" ~ "Political action",
      outcome == "y_donate_n_t1" ~ "Donations to pro-Black group",
      outcome == "y_trust_idx_t1" ~ "Trust in the police",
      outcome == "y_coop_idx_t1" ~ "Willingness to cooperate",
      TRUE ~ "Manipulation check"
    ),
    outcome_lab = factor(
      outcome,
      levels = c(
        "Affirmative action",
        "Diversity hiring",
        "Political action",
        "Donations to pro-Black group",
        "Trust in the police",
        "Willingness to cooperate",
        "Manipulation check"
      ),
      labels = c(
        "Support for affirmative action in recruitment and hiring",
        "Support for tie-breaking in favor of minority applicants",
        "Voted for police department diversification",
        "Donation to Black police officer association",
        "Trust and confidence in the police department",
        "Willingness to cooperate with police officers",
        "Manipulation check: belief updating"
      )
    )
  ) %>% 
  select(estimate, std.error, conf.low, conf.high, statistic, p.value, outcome, 
         estimator, estimand, outcome_lab)

## Export to Latex
iv_est_df %>% 
  filter(estimand != "ITTd") %>% 
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value)) %>%
  dplyr::select(outcome_lab, entry, estimand) %>% 
  spread(estimand, entry) %>% 
  xtable(., ) %>%
  print(include.rownames = FALSE, 
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "iv_estimates.tex")


#### ---- Table S14: estimates for additional arms, national sample ------ ####

## Affirmative action hiring (y_aa_idx)
est_aa_idx <-
  lm_robust(y_aa_idx_s ~ Z_diversity, lucid_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3)

## Hiring minority applicants (y_trolley_idx) 
est_trolley_idx <-
  lm_robust(y_trolley_idx_s ~ Z_diversity, lucid_dat) %>%
  tidy() %>%
  mutate_if(is.numeric, round, 3) 


lucid_est_df <-
  bind_rows(est_aa_idx, est_trolley_idx) %>%
  filter(term != "(Intercept)") %>%
  mutate(
    sample = "National sample",
    Z = factor(
      term,
      levels = c(
        "Z_diversityInfo",
        "Z_diversityScience",
        "Z_diversityInfo + Science"
      ),
      labels = c("Info", "Science", "Info + Science")
    ),
    outcome_lab = factor(
      outcome,
      levels = c("y_aa_idx_s",
                 "y_trolley_idx_s"),
      labels = c(
        "Support for affirmative action in recruitment and hiring",
        "Support for tie-breaking in favor of minority applicants"
      )
    )
  )

lucid_est_df %>%
  select(Z,
         estimate,
         std.error,
         statistic,
         p.value,
         conf.low,
         conf.high,
         outcome_lab) %>%
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value)) %>%
  dplyr::select(Z, outcome_lab, entry) %>%
  spread(Z, entry) %>%
  xtable(.,) %>%
  print(
    include.rownames = FALSE,
    include.colnames = FALSE,
    hline.after = c(),
    only.contents = TRUE,
    type = "latex",
    file = "lucid_all_ates.tex"
  )


#### ---- Table S15: estimates for additional outcomes, municipal sample - ####
outcomes <-
  c(
    "y_policy_rank_dvsty_n_t1",
    "y_policy_supp_dvsty_n_t1",
    "y_career_idx_t1",
    "y_diverse_import_race_n_t1",
    "y_diverse_import_sex_n_t1",
    "y_diverse_usa_white_t1",
    "y_diverse_usa_black_t1",
    "y_diverse_usa_latino_t1",
    "y_diverse_usa_asian_t1",
    "y_diverse_usa_male_t1",
    "y_diverse_usa_female_t1"
  )

ate_est <- list()
for(i in 1: length(outcomes)){
  ate_est[[i]] <- 
    lm_robust(as.formula(paste0(outcomes[i], "~ Z_diversity_t1")),
              data = cvs_dat) %>% 
    tidy() %>% 
    filter(term == "Z_diversity_t1info") %>% 
    mutate_if(is.numeric, round, 3)
  
}

cvs_est_df <-
  bind_rows(ate_est) %>%
  mutate(
    statistic = estimate / std.error,
    outcome_lab = factor(
      outcome,
      levels = c(
        "y_policy_supp_dvsty_n_t1",
        "y_policy_rank_dvsty_n_t1",
        "y_career_idx_t1",
        "y_diverse_import_race_n_t1",
        "y_diverse_import_sex_n_t1",
        "y_diverse_usa_white_t1",
        "y_diverse_usa_black_t1",
        "y_diverse_usa_latino_t1",
        "y_diverse_usa_asian_t1",
        "y_diverse_usa_female_t1",
        "y_diverse_usa_male_t1"
      ),
      labels = c(
        "Stated support for diversification policy",
        "Rank ordering of diversification policy",
        "Willigness to consider policing career",
        "Importance of racial diversity",
        "Importance of gender diversity",
        "White officers",
        "Black officers",
        "Hispanic/Latino officers",
        "Asian officers",
        "Female officers",
        "Male officers"
      )
    )
  )


cvs_est_df %>% 
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value)) %>%
  dplyr::select(outcome_lab, entry) %>% 
  xtable(., ) %>%
  print(include.rownames = FALSE, 
        include.colnames = FALSE,
        hline.after = c(),
        only.contents = TRUE,
        type = "latex",
        file = "additional_ates.tex")

#### ---- Fig. S34: Causal attributions for disparities ------------------ ####

## Attribution: PDs don't want to hire minority officers
f1 <-
  cvs_dat %>%
  filter(.,!is.na(y_cattr_demand_t1)) %>%
  ggplot(., aes(x = y_cattr_demand_t1)) +
  geom_bar(fill = "gray40") +
  scale_x_discrete(
    breaks = c(
      "Strongly disagree",
      "Disagree",
      "Somewhat disagree",
      "Neither agree nor disagree",
      "Somewhat agree",
      "Agree",
      "Strongly agree"
    ),
    labels = c("Strongly \ndisagree",
               "", "",
               "",
               "",
               "", "Strongly \nagree")
  ) +
  theme_tufte(base_size = 12) +
  theme(
    axis.text.x = element_text(size = 10),
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(size = 0.25),
    strip.text = element_text(size = 9),
    axis.line.x = element_line(size = 0.25),
    axis.title.x = element_text(size = 10)
  ) +
  labs(subtitle = "",
       x = "Police departments do not want to recruit/hire minority officers",
       y = "Count")

## Attribution: minorities do not want to become officers
f2 <-
  cvs_dat %>%
  filter(.,!is.na(y_cattr_supply_t1)) %>%
  ggplot(., aes(x = y_cattr_supply_t1)) +
  geom_bar(fill = "gray40") +
  scale_x_discrete(
    breaks = c(
      "Strongly disagree",
      "Disagree",
      "Somewhat disagree",
      "Neither agree nor disagree",
      "Somewhat agree",
      "Agree",
      "Strongly agree"
    ),
    labels = c("Strongly \ndisagree",
               "", "",
               "",
               "",
               "", "Strongly \nagree")
  ) +
  theme_tufte(base_size = 12) +
  theme(
    axis.text.x = element_text(size = 10),
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(size = 0.25),
    strip.text = element_text(size = 9),
    axis.line.x = element_line(size = 0.25),
    axis.title.x = element_text(size = 10)
  ) +
  labs(subtitle = "",
       x = "Minorities do not want to become police officers",
       y = "Count")


## Attribution: ninorities do not qualify
f3 <-
  cvs_dat %>%
  filter(.,!is.na(y_cattr_qualify_t1)) %>%
  ggplot(., aes(x = y_cattr_qualify_t1)) +
  geom_bar(fill = "gray40") +
  scale_x_discrete(
    breaks = c(
      "Strongly disagree",
      "Disagree",
      "Somewhat disagree",
      "Neither agree nor disagree",
      "Somewhat agree",
      "Agree",
      "Strongly agree"
    ),
    labels = c("Strongly \ndisagree",
               "", "",
               "",
               "",
               "", "Strongly \nagree")
  ) +
  theme_tufte(base_size = 12) +
  theme(
    axis.text.x = element_text(size = 10),
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(size = 0.25),
    strip.text = element_text(size = 9),
    axis.line.x = element_line(size = 0.25),
    axis.title.x = element_text(size = 10)
  ) +
  labs(subtitle = "",
       x = "Minorities lack the qualifications to become police officers",
       y = "Count")


## Attribution: unsupportive working environment
f4 <-
  cvs_dat %>%
  filter(.,!is.na(y_cattr_retention_t1)) %>%
  ggplot(., aes(x = y_cattr_retention_t1)) +
  geom_bar(fill = "gray40") +
  scale_x_discrete(
    breaks = c(
      "Strongly disagree",
      "Disagree",
      "Somewhat disagree",
      "Neither agree nor disagree",
      "Somewhat agree",
      "Agree",
      "Strongly agree"
    ),
    labels = c("Strongly \ndisagree",
               "", "",
               "",
               "",
               "", "Strongly \nagree")
  ) +
  theme_tufte(base_size = 12) +
  theme(
    axis.text.x = element_text(size = 10),
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(size = 0.25),
    strip.text = element_text(size = 9),
    axis.line.x = element_line(size = 0.25),
    axis.title.x = element_text(size = 10)
  ) +
  labs(subtitle = "",
       x = "Police departments do not provide minority officers with a supportive working environment",
       y = "Count")

ggpubr::ggarrange(
  f1,
  f2,
  f3,
  f4,
  labels = c("A", "B", "C", "D"),
  ncol = 2,
  nrow = 2
)

ggsave(filename = "cas_att.pdf",
       width = 10,
       height = 6)

#### ---- Fig. S35: Demographic correlates of misperceptions ------------ ####

# reorder PID and create indicators for white and male
cvs_dat <-
  cvs_dat %>%
  mutate(
    x_pid_3_f = factor(x_pid_3_t0, c("Independent", "Democrat", "Republican")),
    x_white = ifelse(x_race_cat_t0 == "White", 1, 0),
    x_male = ifelse(x_female_t0 == 1, 0, 1),
    x_edu_college = case_when(
      x_edu == "No high school diploma" ~ 0,
      x_edu == "High school diploma" ~ 0,
      x_edu == "Associate's degree" ~ 0,
      x_edu == "Bachelor's degree" ~ 1,
      x_edu == "Graduate degree" ~ 1
    )
  )


# reorder PID and create indicators for white and male, college educated
lucid_dat <-
  lucid_dat %>%
  mutate(
    x_pid_3_f = case_when(
      x_pid_7n < 4 ~ "Democrat",
      x_pid_7n == 4 ~ "Independent",
      x_pid_7n > 4 ~ "Republican"
    ),
    x_pid_3_f = factor(x_pid_3_f, c("Independent", "Democrat", "Republican")),
    x_white = ifelse(x_race == "White", 1, 0),
    x_male = ifelse(x_female == 1, 0, 1),
    x_edu_college = case_when(
      x_edu == "No high school diploma" ~ 0,
      x_edu == "High school diploma" ~ 0,
      x_edu == "Associate's degree" ~ 0,
      x_edu == "Bachelor's degree" ~ 1,
      x_edu == "Graduate degree" ~ 1
    )
  )


## Regressions for municipal sample:

# White officers
ynk_white <-
  lm_robust(x_diverse_diff_white_t1 ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = cvs_dat) %>%
  tidy() %>%
  mutate(Outcome = "White Officers",
         Sample = "Municipal")

# Male officers
ynk_male <-
  lm_robust(x_diverse_diff_male_t1 ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = cvs_dat) %>%
  tidy() %>%
  mutate(Outcome = "Male Officers",
         Sample = "Municipal")

# Black officers
ynk_black <-
  lm_robust(x_diverse_diff_black_t1 ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = cvs_dat) %>%
  tidy() %>%
  mutate(Outcome = "Black Officers",
         Sample = "Municipal")

# Latino officers
ynk_latino <-
  lm_robust(x_diverse_diff_latino_t1 ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = cvs_dat) %>%
  tidy() %>%
  mutate(Outcome = "Latino Officers",
         Sample = "Municipal")

# Asian officers
ynk_asian <-
  lm_robust(x_diverse_diff_asian_t1 ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = cvs_dat) %>%
  tidy() %>%
  mutate(Outcome = "Asian Officers",
         Sample = "Municipal")

## Regressions for national sample:

# White officers
lcd_white <-
  lm_robust(x_usa_diff_white ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = lucid_dat) %>%
  tidy() %>%
  mutate(Outcome = "White Officers",
         Sample = "National")

# Male officers
lcd_male <-
  lm_robust(x_usa_diff_male ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = lucid_dat) %>%
  tidy() %>%
  mutate(Outcome = "Male Officers",
         Sample = "National")

# Black officers
lcd_black <-
  lm_robust(x_usa_diff_black ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = lucid_dat) %>%
  tidy() %>%
  mutate(Outcome = "Black Officers",
         Sample = "National")

# Latino officers
lcd_latino <-
  lm_robust(x_usa_diff_latino ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = lucid_dat) %>%
  tidy() %>%
  mutate(Outcome = "Latino Officers",
         Sample = "National")

# Asian officers
lcd_asian <-
  lm_robust(x_usa_diff_asian ~
              x_pid_3_f + x_white + x_edu_college + x_male,
            data = lucid_dat) %>%
  tidy() %>%
  mutate(Outcome = "Asian Officers",
         Sample = "National")

## Combine correlation models for plotting
corr_misperception <-
  rbind(
    lcd_white,
    lcd_male,
    lcd_black,
    lcd_latino,
    lcd_asian,
    ynk_white,
    ynk_male,
    ynk_black,
    ynk_latino,
    ynk_asian
  ) %>%
  filter(term != "(Intercept)") %>%
  mutate(
    term = case_when(
      term == "x_pid_3_fDemocrat" ~ "Democrat",
      term == "x_pid_3_fRepublican" ~ "Republican",
      term == "x_white" ~ "White",
      term == "x_edu_college" ~ "College",
      term == "x_male" ~ "Male"
    ),
    term = factor(
      term,
      levels = c("College", "Democrat", "Republican",
                 "Male", "White")
    ),
    Outcome = factor(
      Outcome,
      levels =   c(
        "White Officers",
        "Male Officers",
        "Black Officers",
        "Latino Officers",
        "Asian Officers"
      )
    )
  )

g <-
  ggplot(
    corr_misperception,
    aes(
      y = term,
      x = estimate,
      shape = Sample,
      color = Sample,
      fill = Sample
    )
  ) +
  geom_vline(
    xintercept = 0,
    color = "black",
    linetype = "solid",
    size = 0.2
  ) +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0,
    position = position_dodgev(height = 0.7),
    size = 1
  ) +
  geom_point(position = position_dodgev(height = 0.7),
             size = 2.5) +
  scale_color_manual("", values = rev(c("#888888", "#000000"))) +
  scale_fill_manual("", values = rev(c("#888888", "#000000"))) +
  scale_shape_manual("", values = rev(c(21, 22))) +
  facet_grid( ~ Outcome, scales = "free_y", space = "free") +
  theme_tufte(base_size = 14) +
  guides(
    fill = guide_legend(reverse = TRUE),
    color = guide_legend(reverse = TRUE),
    shape = guide_legend(reverse = TRUE)
  ) +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(size = 0.25),
    strip.text = element_text(size = 11),
    axis.line.x = element_line(size = 0.25),
    axis.title.x = element_text(size = 10),
    legend.position = "bottom"
  ) +
  labs(subtitle = "",
       x = "Demographic Correlates of Misperception",
       y = "")

ggsave(g,
       filename = "corr_misp.pdf",
       width = 7,
       height = 5)
